
# mllm 数据处理
# 
# from seed2.seed_llama_tokenizer import ImageTokenizer
from my_py_toolkit.llms.tokenizers.image_tokenizers import ImageTokenizer


# 1 image 转 token
# 1.1 需要 tokenizer 权重，目前使用 seed tokenizer
# 1.2 需要 diffusion 权重
image_tokenizer_path = ''
diffusion_path = "stabilityai/stable-diffusion-2-1-unclip"
device = 'cuda'
image_size = ''
image_tokenizer = ImageTokenizer(model_path=image_tokenizer_path, load_diffusion=True,
                                                  diffusion_model_path=diffusion_path, device=device, image_size=224)

# 2 视频转 token

# 3 音频转 token

# 4 音乐转 token

